home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Aminet 24
/
Aminet 24 (1998)(GTI - Schatztruhe)[!][Apr 1998].iso
/
Aminet
/
comm
/
mail
/
Mutt089src.lha
/
Mutt-0.89i-AMIGA
/
src
/
rx
/
rxgnucomp.c
< prev
next >
Wrap
C/C++ Source or Header
|
1998-01-28
|
44KB
|
1,666 lines
/* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Library General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this software; see the file COPYING. If not, write to
* the Free Software Foundation, 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include <sys/types.h>
#include "rxall.h"
#include "rxgnucomp.h"
#include "inst-rxposix.h"
/* {A Syntax Table}
*/
/* Define the syntax basics for \<, \>, etc.
*/
#ifndef emacs
#define CHARBITS 8
#define CHAR_SET_SIZE (1 << CHARBITS)
#define Sword 1
#define SYNTAX(c) re_syntax_table[c]
char re_syntax_table[CHAR_SET_SIZE];
#ifdef __STDC__
static void
init_syntax_once (void)
#else
static void
init_syntax_once ()
#endif
{
register int c;
static int done = 0;
if (done)
return;
rx_bzero ((char *)re_syntax_table, sizeof re_syntax_table);
for (c = 'a'; c <= 'z'; c++)
re_syntax_table[c] = Sword;
for (c = 'A'; c <= 'Z'; c++)
re_syntax_table[c] = Sword;
for (c = '0'; c <= '9'; c++)
re_syntax_table[c] = Sword;
re_syntax_table['_'] = Sword;
done = 1;
}
#endif /* not emacs */
const char *rx_error_msg[] =
{
0, /* REG_NOUT */
"No match", /* REG_NOMATCH */
"Invalid regular expression", /* REG_BADPAT */
"Invalid collation character", /* REG_ECOLLATE */
"Invalid character class name", /* REG_ECTYPE */
"Trailing backslash", /* REG_EESCAPE */
"Invalid back reference", /* REG_ESUBREG */
"Unmatched [ or [^", /* REG_EBRACK */
"Unmatched ( or \\(", /* REG_EPAREN */
"Unmatched \\{", /* REG_EBRACE */
"Invalid content of \\{\\}", /* REG_BADBR */
"Invalid range end", /* REG_ERANGE */
"Memory exhausted", /* REG_ESPACE */
"Invalid preceding regular expression", /* REG_BADRPT */
"Premature end of regular expression", /* REG_EEND */
"Regular expression too big", /* REG_ESIZE */
"Unmatched ) or \\)", /* REG_ERPAREN */
};
/*
* Macros used while compiling patterns.
*
* By convention, PEND points just past the end of the uncompiled pattern,
* P points to the read position in the pattern. `translate' is the name
* of the translation table (`TRANSLATE' is the name of a macro that looks
* things up in `translate').
*/
/*
* Fetch the next character in the uncompiled pattern---translating it
* if necessary. *Also cast from a signed character in the constant
* string passed to us by the user to an unsigned char that we can use
* as an array index (in, e.g., `translate').
*/
#define PATFETCH(c) \
do {if (p == pend) return REG_EEND; \
c = (unsigned char) *p++; \
c = translate[c]; \
} while (0)
/*
* Fetch the next character in the uncompiled pattern, with no
* translation.
*/
#define PATFETCH_RAW(c) \
do {if (p == pend) return REG_EEND; \
c = (unsigned char) *p++; \
} while (0)
/* Go backwards one character in the pattern. */
#define PATUNFETCH p--
#define TRANSLATE(d) translate[(unsigned char) (d)]
typedef int regnum_t;
/* Since offsets can go either forwards or backwards, this type needs to
* be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.
*/
typedef int pattern_offset_t;
typedef struct
{
struct rexp_node ** top_expression;
struct rexp_node ** last_expression;
struct rexp_node ** last_non_regular_expression;
pattern_offset_t inner_group_offset;
regnum_t regnum;
} compile_stack_elt_t;
typedef struct
{
compile_stack_elt_t *stack;
unsigned size;
unsigned avail; /* Offset of next open position. */
} compile_stack_type;
#define INIT_COMPILE_STACK_SIZE 32
#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
/* The next available element. */
#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
/* Set the bit for character C in a list. */
#define SET_LIST_BIT(c) \
(b[((unsigned char) (c)) / CHARBITS] \
|= 1 << (((unsigned char) c) % CHARBITS))
/* Get the next unsigned number in the uncompiled pattern. */
#define GET_UNSIGNED_NUMBER(num) \
{ if (p != pend) \
{ \
PATFETCH (c); \
while (isdigit (c)) \
{ \
if (num < 0) \
num = 0; \
num = num * 10 + c - '0'; \
if (p == pend) \
break; \
PATFETCH (c); \
} \
} \
}
#define CHAR_CLASS_MAX_LENGTH 64
#define IS_CHAR_CLASS(string) \
(!strcmp (string, "alpha") || !strcmp (string, "upper") \
|| !strcmp (string, "lower") || !strcmp (string, "digit") \
|| !strcmp (string, "alnum") || !strcmp (string, "xdigit") \
|| !strcmp (string, "space") || !strcmp (string, "print") \
|| !strcmp (string, "punct") || !strcmp (string, "graph") \
|| !strcmp (string, "cntrl") || !strcmp (string, "blank"))
/* These predicates are used in regex_compile. */
/* P points to just after a ^ in PATTERN. Return true if that ^ comes
* after an alternative or a begin-subexpression. We assume there is at
* least one character before the ^.
*/
#ifdef __STDC__
static int
at_begline_loc_p (const char *pattern, const char * p, unsigned long syntax)
#else
static int
at_begline_loc_p (pattern, p, syntax)
const char *pattern;
const char * p;
unsigned long syntax;
#endif
{
const char *prev = p - 2;
int prev_prev_backslash = ((prev > pattern) && (prev[-1] == '\\'));
return
(/* After a subexpression? */
((*prev == '(') && ((syntax & RE_NO_BK_PARENS) || prev_prev_backslash))
||
/* After an alternative? */
((*prev == '|') && ((syntax & RE_NO_BK_VBAR) || prev_prev_backslash))
);
}
/* The dual of at_begline_loc_p. This one is for $. We assume there is
* at least one character after the $, i.e., `P < PEND'.
*/
#ifdef __STDC__
static int
at_endline_loc_p (const char *p, const char *pend, int syntax)
#else
static int
at_endline_loc_p (p, pend, syntax)
const char *p;
const char *pend;
int syntax;
#endif
{
const char *next = p;
int next_backslash = (*next == '\\');
const char *next_next = (p + 1 < pend) ? (p + 1) : 0;
return
(
/* Before a subexpression? */
((syntax & RE_NO_BK_PARENS)
? (*next == ')')
: (next_backslash && next_next && (*next_next == ')')))
||
/* Before an alternative? */
((syntax & RE_NO_BK_VBAR)
? (*next == '|')
: (next_backslash && next_next && (*next_next == '|')))
);
}
unsigned char rx_id_translation[256] =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
200, 201, 202